# Top commands ----
# Create empty R application (no figures, data frames, packages, etc.)
# https://stackoverflow.com/questions/7505547/detach-all-packages-while-working-in-r
detachAllPackages <- function() {
  basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")
  package.list <- search()[ifelse(unlist(gregexpr("package:",search()))==1,TRUE,FALSE)]
  package.list <- setdiff(package.list,basic.packages)
  if (length(package.list)>0)  for (package in package.list) detach(package, character.only=TRUE)
  
}
detachAllPackages()

rm(list=ls(all=TRUE))

# FOLDERS
setwd("/Users/jonathanlatner/GitHub/latner_2018/")

# PLEASE NOTE: WITH PSIDR, RAW DATA FILES LOOK LIKE THIS:
# FAMXXXXER.rda
# INDXXXXER.rda
# where XXXX IS 4-digit year

raw_data_files = "/Users/jonathanlatner/Google Drive/PSID/raw_data/R/"
data_files = "data_files/"

# LIBRARY
library(psidR)
library(tidyverse)
library(beepr)

#  test ----

# famvars <- data.frame(year=c(1969, 1970), 
#                       race_hd=c("V801", "V1490"),
#                       wages_hd=c("V699", "V1191"))
# indvars <- data.frame(year=c(1969, 1970), 
#                       age=c("ER30023", "ER30046"))
# d <- build.panel(datadir = rawdata,
#                  fam.vars = famvars, 
#                  ind.vars = indvars, 
#                  current.heads.only = FALSE, 
#                  design = "all")

# Variables ----

# specify variables from family files you want the family files dataframe can contain NAs. E.g. if there are years where a variable is missing and you want to fix that later on somehow.
fam_vars <- data.frame(year=c(1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2001, 2003, 2005, 2007, 2009, 2011, 2013), 
                      race_hd=c("V801", "V1490", "V2202", "V2828", "V3300", "V3720", "V4204", "V5096", "V5662", "V6209", "V6802", "V7447", "V8099", "V8723", "V9408", "V11055", "V11938", "V13565", "V14612", "V16086", "V17483", "V18814", "V20114", "V21420", "V23276", "ER3944", "ER6814", "ER9060", "ER11848", "ER15928", "ER19989", "ER23426", "ER27393", "ER40565", "ER46543", "ER51904", "ER57659"),
                      edu_hd_cat = c("V794", "V1485", "V2197", "V2823", "V3241", "V3663", "V4198", "V5074", "V5647", "V6194", "V6787", "V7433", "V8085", "V8709", "V9395", "V11042", "V12400", "V13640", "V14687", "V16161", "V17545", "V18898", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      edu_hd_yrs = c(NA, NA, NA, NA, NA, NA, "V4093", "V4684", "V5608", "V6157", "V6754", "V7387", "V8039", "V8663", "V9349", "V10996", NA, NA, NA, NA, NA, NA, "V20198", "V21504", "V23333", "ER4158", "ER6998", "ER9249", "ER12222", "ER16516", "ER20457", "ER24148", "ER28047", "ER41037", "ER46981", "ER52405", "ER58223"),
                      emp_hd=c("V639", "V1278", "V1983", "V2581", "V3114", "V3528", "V3967", "V4458", "V5373", "V5872", "V6492", "V7095", "V7706", "V8374", "V9005", "V10453", "V11637", "V13046", "V14146", "V15154", "V16655", "V18093", "V19393", "V20693", "V22448", "ER2068", "ER5067", "ER7163", NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      emp_wf=c(NA, NA, NA, NA, NA, NA, NA, "V4841", NA, NA, "V6591", "V7193", "V7879", "V8538", "V9188", "V10671", "V12000", "V13225", "V14321", "V15456", "V16974", "V18395", "V19695", "V20995", "V22801", "ER2562", "ER5561", "ER7657", NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      unemp_hd=c("V656", "V1290", "V1996", "V2594", "V3127", "V3542", "V3995", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER21317", "ER25306", "ER36311", "ER42338", "ER47651", "ER53351"),
                      unemp_hd_e=c(NA, NA, NA, NA, NA, NA, NA, "V4504", "V5413", "V5902", "V6513", "V7116", "V7739", "V8401", "V9032", "V10557", "V11701", "V13101", "V14199", "V15253", "V16754", "V18192", "V19492", "V20792", "V22569", "ER2188", "ER5187", "ER7283", "ER10199", "ER13330", "ER17353", NA, NA, NA, NA, NA, NA),
                      unemp_hd_u=c(NA, NA, NA, NA, NA, NA, NA, "V4567", "V5469", "V5996", "V6569", "V7171", "V7819", "V8480", "V9117", "V10625", "V11798", "V13194", "V14290", "V15400", "V16915", "V18339", "V19639", "V20939", "V22735", "ER2433", "ER5432", "ER7528", "ER10438", "ER13583", "ER17635", NA, NA, NA, NA, NA, NA),
                      self_emp_hd=c("V641", "V1280", "V1986", "V2584", "V3117", "V3532", "V3970", "V4461", "V5376", "V5875", "V6493", "V7096", "V7707", "V8375", "V9006", "V10456", "V11640", "V13049", "V14149", "V15157", "V16658", "V18096", "V19396", "V20696", "V22451", "ER2074", "ER5073", "ER7169", "ER10086", "ER13210", "ER17221", "ER21147", "ER25129", "ER36134", "ER42169", "ER47482", "ER53182"),
                      self_emp_wf=c(NA, NA, NA, NA, NA, NA, NA, "V4844", NA, NA, "V6592", "V7194", "V7880", "V8539", "V9189", "V10674", "V12003", "V13228", "V14324", "V15459", "V16977", "V18398", "V19698", "V20998", "V22804", "ER2568", "ER5567", "ER7663", "ER10568", "ER13722", "ER17791", "ER21397", "ER25387", "ER36392", "ER42421", "ER47739", "ER53445"),
                      sex_hd=c("V1010", "V1240", "V1943", "V2543", "V3096", "V3509", "V3922", "V4437", "V5351", "V5851", "V6463", "V7068", "V7659", "V8353", "V8962", "V10420", "V11607", "V13012", "V14115", "V15131", "V16632", "V18050", "V19350", "V20652", "V22407", "ER2008", "ER5007", "ER7007", "ER10010", "ER13011", "ER17014", "ER21018", "ER25018", "ER36018", "ER42018", "ER47318", "ER53018"),
                      weight_fam=c("V1014", "V1609", "V2321", "V2968", "V3301", "V3721", "V4224", "V5099", "V5665", "V6212", "V6805", "V7451", "V8103", "V8727", "V9433", "V11079", "V12446", "V13687", "V14737", "V16208", "V17612", "V18943", "V20243", "V21547", "V23361", "ER4160", "ER7000", "ER9251", "ER12084", "ER16518", "ER20394", "ER24179", "ER28078", "ER41069", "ER47012", "ER52436", "ER58257"),
                      weeks_hd_e=c("V658", "V1292", "V1998", "V2596", "V3129", "V3544", "V3998", "V4507", "V5417", "V5904", "V6515", "V7118", "V7741", "V8403", "V9034", "V10561", "V11705", "V13105", "V14203", "V15257", "V16758", "V18196", "V19496", "V20796", "V22575", "ER2222", "ER5221", "ER7317", "ER10231", "ER13362", "ER17391", NA, NA, NA, NA, NA, NA),
                      weeks_hd_u=c(NA, "V1333", "V2039", "V2639", "V3156", "V3572", "V4027", "V4570", "V5473", "V5998", "V6571", "V7173", "V7821", "V8482", "V9119", "V10629", "V11802", "V13198", "V14294", "V15404", "V16919", "V18343", "V19643", "V20943", "V22741", "ER2467", "ER5466", "ER7562", NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      weeks_hd=c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER4092", "ER6932", "ER9183", "ER12170", "ER16467", "ER20395", "ER24077", "ER27883", "ER40873", "ER46761", "ER52169", "ER57970"),
                      weeks_wf_e=c("V610", "V1368", "V2076", "V2674", "V3185", "V3603", "V4057", "V4607", "V5522", "V6051", "V6611", "V7213", "V7904", "V8562", "V9212", "V10775", "V12068", "V13282", "V14376", "V15559", "V17077", "V18498", "V19798", "V21098", "V22928", "ER2716", "ER5715", "ER7811", NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      weeks_wf_u=c(NA, NA, NA, NA, NA, NA, NA, "V4954", NA, NA, "V6643", "V7245", "V7934", "V8589", "V9248", "V10829", "V12165", "V13366", "V14458", "V15706", "V17238", "V18645", "V19945", "V21245", "V23094", "ER2960", "ER5959", "ER8056", NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      weeks_wf=c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER4103", "ER6943", "ER9194", "ER12181", "ER16478", "ER20406", "ER24088", "ER27894", "ER40884", "ER46782", "ER52190", "ER57991"),
                      labor_inc_hd=c("V514", "V1196", "V1897", "V2498", "V3051", "V3463", "V3863", "V5031", "V5627", "V6174", "V6767", "V7413", "V8066", "V8690", "V9376", "V11023", "V12372", "V13624", "V14671", "V16145", "V17534", "V18878", "V20178", "V21484", "V23323", "ER4140", "ER6980", "ER9231", "ER12080", "ER16463", "ER20443", "ER24116", "ER27931", "ER40921", "ER46829", "ER52237", "ER58038"),
                      labor_inc_wf=c("V516", "V1198", "V1899", "V2500", "V3053", "V3465", "V3865", "V4379", "V5289", "V5788", "V6398", "V6988", "V7580", "V8273", "V8881", "V10263", "V11404", "V12803", "V13905", "V14920", "V16420", "V17836", "V19136", "V20436", "V23324", "ER4144", "ER6984", "ER9235", "ER12082", "ER16465", "ER20447", "ER24135", "ER27943", "ER40933", "ER46841", "ER52249", "ER58050"),
                      tot_fam_income=c("V529", "V1514", "V2226", "V2852", "V3256", "V3676", "V4154", "V5029", "V5626", "V6173", "V6766", "V7412", "V8065", "V8689", "V9375", "V11022", "V12371", "V13623", "V14670", "V16144", "V17533", "V18875", "V20175", "V21481", "V23322", "ER4153", "ER6993", "ER9244", "ER12079", "ER16462", "ER20456", "ER24099", "ER28037", "ER41027", "ER46935", "ER52343", "ER58152"),
                      wages_hd=c("V699", "V1191", "V1892", "V2493", "V3046", "V3458", "V3858", "V4373", "V5283", "V5782", "V6391", "V6981", "V7573", "V8265", "V8873", "V10256", "V11397", "V12796", "V13898", "V14913", "V16413", "V17829", "V19129", "V20429", "V21739", "ER4122", "ER6962", "ER9213", "ER12196", "ER16493", "ER20425", "ER24117", "ER27913", "ER40903", "ER46811", "ER52219", "ER58020"),
                      hours_hd=c("V465", "V1138", "V1839", "V2439", "V3027", "V3423", "V3823", "V4332", "V5232", "V5731", "V6336", "V6934", "V7530", "V8228", "V8830", "V10037", "V11146", "V12545", "V13745", "V14835", "V16335", "V17744", "V19044", "V20344", "V21634", "ER4096", "ER6936", "ER9187", "ER12174", "ER16471", "ER20399", "ER24080", "ER27886", "ER40876", "ER46767", "ER52175", "ER57976"),
                      hours_wf=c("V475", "V1148", "V1849", "V2449", "V3035", "V3431", "V3831", "V4344", "V5244", "V5743", "V6348", "V6946", "V7540", "V8238", "V8840", "V10131", "V11258", "V12657", "V13809", "V14865", "V16365", "V17774", "V19074", "V20374", "V21670", "ER4107", "ER6947", "ER9198", "ER12185", "ER16482", "ER20410", "ER24091", "ER27897", "ER40887", "ER46788", "ER52196", "ER57997"),
                      marital=c("V607", "V1365", "V2072", "V2670", "V3181", "V3598", "V4053", "V4603", "V5650", "V6197", "V6790", "V7435", "V8087", "V8711", "V9419", "V11065", "V12426", "V13665", "V14712", "V16187", "V17565", "V18916", "V20216", "V21522", "V23336", "ER4159A", "ER6999A", "ER9250A", "ER12223A", "ER16423", "ER20369", "ER24150", "ER28049", "ER41039", "ER46983", "ER52407", "ER58225"),
                      kids=c("V550", "V1242", "V1945", "V2545", "V3098", "V3511", "V3924", "V4439", "V5353", "V5853", "V6465", "V7070", "V7661", "V8355", "V8964", "V10422", "V11609", "V13014", "V14117", "V15133", "V16634", "V18052", "V19352", "V20654", "V22409", "ER2010", "ER5009", "ER7009", "ER10012", "ER13013", "ER17016", "ER21020", "ER25020", "ER36020", "ER42020", "ER47320", "ER53020"),
                      fam_size=c("V549", "V1238", "V1941", "V2541", "V3094", "V3507", "V3920", "V4435", "V5349", "V5849", "V6461", "V7066", "V7657", "V8351", "V8960", "V10418", "V11605", "V13010", "V14113", "V15129", "V16630", "V18048", "V19348", "V20650", "V22405", "ER2006", "ER5005", "ER7005", "ER10008", "ER13009", "ER17012", "ER21016", "ER25016", "ER36016", "ER42016", "ER47316", "ER53016"),
                      prs_job_mths = c(NA, NA, NA, NA, NA, NA, NA, "V4488", "V5397", "V5888", "V6499", "V7102", "V7722", "V8390", "V9021", "V10520", "V11669", "V13069", "V14167", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      prs_emp_cat = c("V642", "V1281", "V1987", "V2585", "V3118", "V3533", "V3984", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      prs_emp_mths = c(NA, NA, NA, NA, NA, NA, NA, "V4480", "V5384", "V5941", NA, NA, "V7711", "V8379", "V9010", "V10519", "V11668", "V13068", "V14166", "V15181", "V16682", "V18120", "V19420", "V20720", "V22489", "ER2099", "ER5098", "ER7194", "ER10118", "ER13244", "ER17255", "ER21172", "ER25161", "ER36166", "ER42201", "ER47514", "ER53214"),
                      prs_emp_yrs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER2098", "ER5097", "ER7193", "ER10117", "ER13243", "ER17254", "ER21171", "ER25160", "ER36165", "ER42200", "ER47513", "ER53213"),
                      prs_emp_wks = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER2100", "ER5099", "ER7195", "ER10119", "ER13245", "ER17256", "ER21173", "ER25162", "ER36167", "ER42202", "ER47515", "ER53215"),
                      unemp_hd_hrs = c("V469", "V1142", "V1843", "V2443", "V3031", "V3427", "V3827", "V4338", "V5240", "V5739", "V6344", "V6942", "V7538", "V8236", "V8838", "V10045", "V11153", "V12552", "V13752", "V14842", "V16342", "V17751", "V19051", "V20351", "V21638", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      unemp_hd_wks = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER4101", "ER6941", "ER9192", "ER12179", "ER16476", "ER20404", "ER24086", "ER27892", "ER40882", "ER46778", "ER52186", "ER57987"),
                      unemp_wf_hrs = c(NA, NA, NA, NA, NA, NA, NA, "V4727", "V5252", "V5751", "V6356", "V6954", "V7548", "V8246", "V8848", "V10139", "V11265", "V12664", "V13816", "V14872", "V16372", "V17781", "V19081", "V20381", "V21674", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                      unemp_wf_wks = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER4112", "ER6952", "ER9203", "ER12190", "ER16487", "ER20415", "ER24097", "ER27903", "ER40893", "ER46799", "ER52207", "ER58008"),
                      why_lst_jb_end_hd_u = c("V651", "V1332", "V2038", "V2638", "V3155", "V3571", "V4026", "V4556", "V5458", "V5986", "V6559", "V7161", "V7809", "V8470", "V9107", "V10609", "V11764", "V13160", "V14256", "V15328", "V16843", "V18267", "V19567", "V20867", "V22655", "ER4034", "ER6874", "ER9125", "ER12102", "ER13498", "ER17538", "ER21184", "ER25173", "ER36178", "ER42211", "ER47524", "ER53224"),
                      why_lst_jb_end_hd_e = c("V643", "V1282", "V1988", "V2586", "V3119", "V3534", "V3986", "V4490", "V5399", "V5890", "V6501", "V7104", "V7727", "V8391", "V9022", "V10539", "V11679", "V13079", "V14177", "V15240", "V16741", "V18179", "V19479", "V20779", "V22551", "ER4023", "ER6863", "ER9114", "ER12091", "ER13310", "ER17321", NA, NA, NA, NA, NA, NA),
                      move = c("V603", "V1274", "V1979", "V2577", "V3110", "V3524", "V3941", "V4452", "V5366", "V5866", "V6484", "V7089", "V7700", "V8369", "V8999", "V10447", "V11628", "V13037", "V14140", "V15148", "V16649", "V18087", "V19387", "V20687", "V22441", "ER2062", "ER5061", "ER7155", "ER10072", "ER13077", "ER17088", "ER21117", "ER25098", "ER36103", "ER42132", "ER47440", "ER53140"),
                      move_why = c("V604", "V1275", "V1980", "V2578", "V3111", "V3525", "V3943", "V4454", "V5368", "V5868", "V6486", "V7091", "V7702", "V8370", "V9001", "V10449", "V11630", "V13039", "V14142", "V15150", "V16651", "V18089", "V19389", "V20689", "V22444", "ER2065", "ER5064", "ER7158", "ER10075", "ER13080", "ER17091", "ER21120", "ER25101", "ER36106", "ER42135", "ER47443", "ER53143"),
                      move_year = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "V22443", "ER2064", "ER5063", "ER7157", "ER10074", "ER13079", "ER17090", "ER21119", "ER25100", "ER36105", "ER42134", "ER47442", "ER53142")
                      )

# specify variables from individual index file
# these cannot contain NAs at the moment.  see below.
ind_vars <- data.frame(year=c(1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2001, 2003, 2005, 2007, 2009, 2011, 2013), 
                      age=c("ER30023", "ER30046", "ER30070", "ER30094", "ER30120", "ER30141", "ER30163", "ER30191", "ER30220", "ER30249", "ER30286", "ER30316", "ER30346", "ER30376", "ER30402", "ER30432", "ER30466", "ER30501", "ER30538", "ER30573", "ER30609", "ER30645", "ER30692", "ER30736", "ER30809", "ER33104", "ER33204", "ER33304", "ER33404", "ER33504", "ER33604", "ER33704", "ER33804", "ER33904", "ER34004", "ER34104", "ER34204"),
                      emp_ind=c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ER30293", "ER30323", "ER30353", "ER30382", "ER30411", "ER30441", "ER30474", "ER30509", "ER30545", "ER30580", "ER30616", "ER30653", "ER30699", "ER30744", "ER30816", "ER33111", "ER33211", "ER33311", "ER33411", "ER33512", "ER33612", "ER33712", "ER33813", "ER33913", "ER34016", "ER34116", "ER34216"),
                      edu_ind_yrs=c(NA, "ER30052", "ER30076", "ER30100", "ER30126", "ER30147", "ER30169", "ER30197", "ER30226", "ER30255", "ER30296", "ER30326", "ER30356", "ER30384", "ER30413", "ER30443", "ER30478", "ER30513", "ER30549", "ER30584", "ER30620", "ER30657", "ER30703", "ER30748", "ER30820", "ER33115", "ER33215", "ER33315", "ER33415", "ER33516", "ER33616", "ER33716", "ER33817", "ER33917", "ER34020", "ER34119", "ER34230"),
                      weight_ind_long=c("ER30042", "ER30066", "ER30090", "ER30116", "ER30137", "ER30159", "ER30187", "ER30216", "ER30245", "ER30282", "ER30312", "ER30342", "ER30372", "ER30398", "ER30428", "ER30462", "ER30497", "ER30534", "ER30569", "ER30605", "ER30641", "ER30686", "ER30730", "ER30803", "ER30864", "ER33119", "ER33275", "ER33318", "ER33430", "ER33546", "ER33637", "ER33740", "ER33848", "ER33950", "ER34045", "ER34154", "ER34268"),
                      gender=c("ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000", "ER32000"))

df_psid <- build.panel(datadir = raw_data_files,
                 fam.vars = fam_vars, 
                 ind.vars = ind_vars, 
                 current.heads.only = FALSE, 
                 design = "all")

df_psid <- df_psid %>%
  rename(household = interview) %>%
  arrange(pid, year) %>%
  filter(relation.head!=0) %>%
  select(pid,year,sequence,ID1968,relation.head,pernum,household,age,gender,everything())
rm(fam_vars, ind_vars)

# Save ----

saveRDS(df_psid, file = paste0(data_files,"psid.rds"))

